{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore BABEL\n",
    "\n",
    "We present some code to explore BABEL by computing stats., and searching for specific actions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Preparing the environment\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib notebook\n",
    "%matplotlib inline\n",
    "\n",
    "import sys, os, pdb\n",
    "from os.path import join as ospj\n",
    "import json\n",
    "from collections import *\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas.core.common import flatten\n",
    "\n",
    "import pprint\n",
    "pp = pprint.PrettyPrinter()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load BABEL \n",
    "Note that we are not loading the test set "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "d_folder = '../data/babel_v1.0_release' # Data folder\n",
    "l_babel_dense_files = ['train', 'val']  \n",
    "l_babel_extra_files = ['extra_train', 'extra_val']\n",
    "\n",
    "# BABEL Dataset \n",
    "babel = {}\n",
    "for file in l_babel_dense_files:\n",
    "    babel[file] = json.load(open(ospj(d_folder, file+'.json')))\n",
    "    \n",
    "for file in l_babel_extra_files:\n",
    "    babel[file] = json.load(open(ospj(d_folder, file+'.json'))) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Duration of mocap for which BABEL action labels are available"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total duration = 30.0 hours 2.0 min. 32 sec.\n",
      "Total # seqs. =  8808\n",
      "------------------------------\n",
      "Total duration = 34.0 hours 43.0 min. 39 sec.\n",
      "Total # seqs. =  10576\n",
      "------------------------------\n"
     ]
    }
   ],
   "source": [
    "for babel_set in [l_babel_dense_files, l_babel_dense_files+l_babel_extra_files]:\n",
    "    dur = 0.0\n",
    "    list_sids = []    \n",
    "    for spl in babel_set:\n",
    "        for sid in babel[spl]:\n",
    "            if sid not in list_sids:\n",
    "                list_sids.append(sid)\n",
    "                dur += babel[spl][sid]['dur'] \n",
    "                \n",
    "    # Duration of each set\n",
    "    minutes = dur//60\n",
    "    print('Total duration = {0} hours {1} min. {2:.0f} sec.'.format(\n",
    "                                            minutes//60, minutes%60, dur%60))\n",
    "    print('Total # seqs. = ', len(list_sids))\n",
    "    print('-'*30)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Search BABEL for action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_cats(ann, file):\n",
    "    # Get sequence labels and frame labels if they exist\n",
    "    seq_l, frame_l = [], []\n",
    "    if 'extra' not in file:\n",
    "        if ann['seq_ann'] is not None:\n",
    "            seq_l = flatten([seg['act_cat'] for seg in ann['seq_ann']['labels']])\n",
    "        if ann['frame_ann'] is not None:\n",
    "            frame_l = flatten([seg['act_cat'] for seg in ann['frame_ann']['labels']])\n",
    "    else:\n",
    "        # Load all labels from (possibly) multiple annotators\n",
    "        if ann['seq_anns'] is not None:\n",
    "            seq_l = flatten([seg['act_cat'] for seq_ann in ann['seq_anns'] for seg in seq_ann['labels']])\n",
    "        if ann['frame_anns'] is not None:            \n",
    "            frame_l = flatten([seg['act_cat'] for frame_ann in ann['frame_anns'] for seg in frame_ann['labels']])\n",
    "            \n",
    "    return list(seq_l), list(frame_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# Seqs. containing action jump = 746\n",
      "# Segments containing action jump = 1597\n"
     ]
    }
   ],
   "source": [
    "action = 'jump'\n",
    "act_anns = defaultdict(list) # { seq_id_1: [ann_1_1, ann_1_2], seq_id_2: [ann_2_1], ...} \n",
    "n_act_spans = 0\n",
    "\n",
    "for spl in babel:\n",
    "    for sid in babel[spl]:\n",
    "        \n",
    "        seq_l, frame_l = get_cats(babel[spl][sid], spl)\n",
    "        # print(seq_l + frame_l)\n",
    "        \n",
    "        if action in seq_l + frame_l:\n",
    "            \n",
    "            # Store all relevant mocap sequence annotations\n",
    "            act_anns[sid].append(babel[spl][sid])\n",
    "            \n",
    "            # # Individual spans of the action in the sequence\n",
    "            n_act_spans += Counter(seq_l+frame_l)[action]\n",
    "            \n",
    "print('# Seqs. containing action {0} = {1}'.format(action, len(act_anns)))\n",
    "print('# Segments containing action {0} = {1}'.format(action, n_act_spans))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'babel_sid': 7692,\n",
      "  'dur': 3.83,\n",
      "  'feat_p': 'CMU/CMU/141/141_05_poses.npz',\n",
      "  'frame_ann': {'anntr_id': 'eab5b72f-7399-43a7-a752-e4ee2807faaf',\n",
      "                'babel_lid': '59ad905d-f378-4d2b-90a7-4e3222bbc1f7',\n",
      "                'labels': [{'act_cat': ['hop'],\n",
      "                            'end_t': 2,\n",
      "                            'proc_label': 'hop left',\n",
      "                            'raw_label': 'hopping left',\n",
      "                            'seg_id': 'daf942ad-7cbe-4387-b6a0-0fc391c702ea',\n",
      "                            'start_t': 1},\n",
      "                           {'act_cat': ['hop'],\n",
      "                            'end_t': 3,\n",
      "                            'proc_label': 'hop right',\n",
      "                            'raw_label': 'hopping right',\n",
      "                            'seg_id': '7b17f75e-3da9-4e56-aca1-9bbb6b8d5dd9',\n",
      "                            'start_t': 2},\n",
      "                           {'act_cat': ['stand'],\n",
      "                            'end_t': 1,\n",
      "                            'proc_label': 'stand',\n",
      "                            'raw_label': 'standing',\n",
      "                            'seg_id': '70687891-613e-42f7-87f4-5760f18a3548',\n",
      "                            'start_t': 0},\n",
      "                           {'act_cat': ['stand'],\n",
      "                            'end_t': 3.834,\n",
      "                            'proc_label': 'stand',\n",
      "                            'raw_label': 'standing',\n",
      "                            'seg_id': 'f0cdfd79-5dad-43f3-b2d1-8a0ce8668010',\n",
      "                            'start_t': 3}],\n",
      "                'mul_act': True},\n",
      "  'seq_ann': {'anntr_id': '30bf91ac-e0c1-4298-814f-7811fe634bac',\n",
      "              'babel_lid': 'da9d959f-f5b6-434f-a927-35effc7b5afe',\n",
      "              'labels': [{'act_cat': ['jump'],\n",
      "                          'proc_label': 'jump',\n",
      "                          'raw_label': 'jump',\n",
      "                          'seg_id': '082c172b-3883-4231-9c81-fcee4cf1a999'}],\n",
      "              'mul_act': True},\n",
      "  'url': 'https://babel-renders.s3.eu-central-1.amazonaws.com/007692.mp4'}]\n"
     ]
    }
   ],
   "source": [
    "# View a random annotation \n",
    "key = np.random.choice(list(act_anns.keys()))\n",
    "pp.pprint(act_anns[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
